The NVIDIA System Management Interface (nvidia-smi) is a command line utility, based on top of the NVIDIA Management Library (NVML), intended to aid in the management and monitoring of NVIDIA GPU devices.

In [ ]:
!nvidia-smi
Thu Aug  6 15:01:07 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.57       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

install dependencies:

In [ ]:
# install dependencies: 
# !pip install cython pyyaml==5.1
# !pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
# import torch, torchvision
# print(torch.__version__, torch.cuda.is_available())
# !gcc --version


!pip install pyyaml==5.1 pycocotools>=2.0.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab
assert torch.__version__.startswith("1.6")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html

Import section

In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import random
import cv2


import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()



from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer

from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog

import os
import json
from detectron2.structures import BoxMode


from google.colab import drive
drive.mount('/content/gdrive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive

Path Define + Load csv file

In [ ]:
train_df = pd.read_csv('/content/gdrive/My Drive/Global Wheat Detection/train.csv')
train_path = "/content/gdrive/My Drive/Global Wheat Detection/train"

Show data frame :

In [ ]:
train_df.head()
Out[ ]:
image_id width height bbox source
0 b6ab77fd7 1024 1024 [834.0, 222.0, 56.0, 36.0] usask_1
1 b6ab77fd7 1024 1024 [226.0, 548.0, 130.0, 58.0] usask_1
2 b6ab77fd7 1024 1024 [377.0, 504.0, 74.0, 160.0] usask_1
3 b6ab77fd7 1024 1024 [834.0, 95.0, 109.0, 107.0] usask_1
4 b6ab77fd7 1024 1024 [26.0, 144.0, 124.0, 117.0] usask_1
In [ ]:
# train_df['width'].unique()
# train_df['height'].unique()

Show boxes example for one random image

In [ ]:
for g in train_df.groupby('image_id'):
    b = g[1]['bbox'].values
    print(type(b),b)
    break
<class 'numpy.ndarray'> ['[0, 654, 37, 111]' '[0, 817, 135, 98]' '[0, 192, 22, 81]'
 '[4, 342, 63, 38]' '[82, 334, 82, 81]' '[30, 296, 48, 49]'
 '[176, 316, 70, 54]' '[176, 126, 69, 51]' '[203, 38, 42, 85]'
 '[3, 142, 89, 58]' '[236, 0, 60, 25]' '[329, 0, 75, 57]'
 '[796, 0, 69, 96]' '[659, 24, 59, 90]' '[540, 81, 140, 80]'
 '[233, 152, 89, 51]' '[422, 159, 58, 50]' '[462, 153, 205, 64]'
 '[468, 210, 108, 53]' '[417, 235, 136, 88]' '[287, 257, 56, 51]'
 '[283, 322, 117, 76]' '[393, 329, 174, 100]' '[606, 346, 47, 57]'
 '[611, 286, 70, 56]' '[718, 305, 54, 69]' '[709, 179, 102, 80]'
 '[813, 191, 120, 65]' '[862, 121, 65, 52]' '[876, 400, 80, 104]'
 '[951, 422, 52, 55]' '[763, 414, 69, 54]' '[633, 462, 77, 45]'
 '[438, 436, 104, 51]' '[356, 448, 65, 50]' '[292, 418, 69, 79]'
 '[251, 528, 75, 62]' '[421, 501, 52, 49]' '[692, 487, 77, 66]'
 '[769, 474, 90, 101]' '[692, 685, 83, 58]' '[611, 710, 72, 92]'
 '[417, 635, 70, 71]' '[706, 768, 103, 57]' '[820, 755, 127, 56]'
 '[899, 730, 83, 52]' '[855, 850, 154, 154]' '[792, 939, 63, 85]'
 '[605, 875, 175, 90]' '[364, 832, 58, 64]' '[246, 929, 139, 77]'
 '[400, 937, 82, 87]' '[471, 899, 63, 60]' '[701, 506, 162, 87]'
 '[552, 404, 87, 74]']

Data Preprocessing

Grouping all the boxes according to the name of the image

In [ ]:
total_data = []
for g in tqdm_notebook(train_df.groupby('image_id')):
    data = {}
    data['filename'] = g[0]
    data['bbox'] = g[1]['bbox'].values
    total_data.append(data)
/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  

In total there are 3373 images

In [ ]:
len(total_data)
Out[ ]:
3373

Arrange all the images in dictionary - Create Dataset


For standard tasks, we load the original dataset into list[dict] with a specification similar to COCO’s json annotations. This is our standard representation for a dataset.

file_name: the full path to the image file. Rotation or flipping may be applied if the image has EXIF metadata.

height, width: integer. The shape of the image.

image_id(str or int): a unique id that identifies this image. Required by evaluation to identify the images, but a dataset may use it for different purposes.

bbox (list[float]): list of 4 numbers representing the bounding box of the instance.

bbox_mode (int): the format of bbox. It must be a member of structures.BoxMode. Currently supports: BoxMode.XYXY_ABS, BoxMode.XYWH_ABS.

category_id (int): an integer in the range [0, num_categories-1] representing the category label. The value num_categories is reserved to represent the “background” category, if applicable.

annotations (list[dict]): each dict corresponds to annotations of one instance in this image. Required by instance detection/segmentation or keypoint detection tasks, but can be an empty list.

standard-dataset-dicts

In [ ]:
def get_wheat_dicts(total_data):
    
    dataset_dicts = []
    for idx, v in enumerate(total_data):
        record = {}
        
        filename = os.path.join(train_path, v["filename"]+'.jpg')
        height, width = 1024,1024
        
        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
      
        
        objs = []
        for b in v['bbox']:
            b = json.loads(b)
            obj = {
                'bbox': list(b),
                'bbox_mode': BoxMode.XYWH_ABS,
                'category_id':0,
            }
            objs.append(obj)
            
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

Register a Dataset and “Metadata” for Datasets for Train and Val datasets

Register a Dataset - DatasetCatalog

The function can do arbitrary things and should returns the data in either of the following format:

Detectron2’s standard dataset dict, This will make it work with many other builtin features in detectron2, so it’s recommended to use it when it’s sufficient.


Metadata - MetadataCatalog

Metadata is a key-value mapping that contains information that’s shared among the entire dataset, and usually is used to interpret what’s in the dataset, e.g., names of classes, colors of classes, root of files, etc.

If you register a new dataset through DatasetCatalog.register, you may also want to add its corresponding metadata through MetadataCatalog.get(dataset_name).some_key = some_value, to enable any features that need the metadata.

In [ ]:
#split data 90% train 10% to val 
index = int(0.9 * len(total_data))
train_data = total_data[:index]
val_data = total_data[index:]

folders = ['train', 'val']
for i, d in enumerate([train_data,val_data]):
    DatasetCatalog.register("wheat_" + folders[i], lambda d=d: get_wheat_dicts(d))
    MetadataCatalog.get("wheat_" + folders[i]).set(thing_classes=["wheat"])

Displays 3 random images from the train by using Visualizer and MetaData

Visualizer

Visualizer that draws data about detection/segmentation on images.

It contains methods like draw_{text,box,circle,line,binary_mask,polygon} that draw primitive objects to images

visualizer.draw_dataset_dict - Draw annotations/segmentaions in Detectron2 Dataset format.

Args:

dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.  

Returns:

output (VisImage): image object with visualizations. 
In [ ]:
wheat_metadata = MetadataCatalog.get("wheat_train")
In [ ]:
# train_data[0]['bbox'][0]
In [ ]:
dataset_dicts = get_wheat_dicts(train_data)
for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=wheat_metadata, scale=1)
    vis = visualizer.draw_dataset_dict(d)
    plt.figure(figsize=[10, 20])
    plt.imshow(vis.get_image()[:,:,::-1])
    plt.show()

RetinaNet


RetinaNet adopts the Feature Pyramid Network (FPN) proposed by Lin, Dollar, et al. (2017) as its backbone, which is in turn built on top of ResNet in a fully convolutional fashion. The fully convolutional nature enables the network to take an image of an arbitrary size and outputs proportionally sized feature maps at multiple levels in the feature pyramid.

alt text

RetinaNet Explained and Demystified


RetinaNet Setup

cfg.merge_from_file - load values from a file yaml cfg.MODEL.WEIGHTS - load WEIGHTS from a file yaml

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml")

This is the number of foreground classes, we have only wheat class.

  cfg.MODEL.RETINANET.NUM_CLASSES = 1

Inference cls score threshold, only anchors with score > INFERENCE_TH are considered for inference (to improve speed)

  cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
  cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
  cfg.MODEL.RETINANET.NMS_THRESH_TEST = 0.5

Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets

  cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)

Loss parameters

alt text

  cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
  cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
  cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.3
In [ ]:
def retinanet_setup():
  cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml")  
  cfg.MODEL.RETINANET.NUM_CLASSES = 1

  cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05
  cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000
  cfg.MODEL.RETINANET.NMS_THRESH_TEST = 0.5


  cfg.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0)

  cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0

  cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25

  cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.3
In [ ]:
def faster_rcnn_setup ():
  cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")

  cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
  # cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

General config for all models in detectron2

In [ ]:
cfg = get_cfg()

retinanet_setup()

# cfg.MODEL.WEIGHTS =  '/content/gdrive/My Drive/Global Wheat Detection/yonatan_checkpoints/outputs/model_final.pth'
cfg.DATASETS.TRAIN = ("wheat_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 4

cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  

cfg.SOLVER.GAMMA = 0.05
cfg.SOLVER.MAX_ITER = 30000
cfg.SOLVER.MOMENTUM = 0.9
# Save a checkpoint after every this number of iterations
cfg.SOLVER.CHECKPOINT_PERIOD = 10000

cfg.TEST.EVAL_PERIOD =1000
Loading config /usr/local/lib/python3.6/dist-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.

Model training

In this section we use DefaultTrainer to create a trainer , The trainer will train the model according to config set for him in the section above.

In [ ]:
cfg.OUTPUT_DIR = '/content/gdrive/My Drive/Global Wheat Detection/yonatan_checkpoints/outputs'
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

Load Model

load model with DATASETS.TEST = "wheat_val"

In [ ]:
# cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.WEIGHTS =  '/content/gdrive/My Drive/Global Wheat Detection/yonatan_checkpoints/outputs/model_final.pth'
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = ("wheat_val", )
predictor = DefaultPredictor(cfg)

View predictions on multiple images

In [ ]:
from detectron2.utils.visualizer import ColorMode
dataset_dicts = get_wheat_dicts(val_data)
# wheat_metadata = MetadataCatalog.get("wheat_val")
for d in random.sample(dataset_dicts, 3):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    # print(outputs)
    v = Visualizer(im[:, :, ::-1],     
                   metadata=wheat_metadata, 
                   scale=0.8, 
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=[10, 20])
    plt.imshow(v.get_image()[:,:,::-1])
    plt.show()

Understanding Evaluation Metrics


This competition is evaluated on the mean average precision at different intersection over union (IoU) thresholds.

To understand mAP, we will explain about precision and recall first.

  • Recall is the True Positive Rate i.e. Of all the actual positives, how many are True positives predictions.
  • Precision is the Positive prediction value i.e. Of all the positive predictions, how many are True positives predictions. Read more in evaluation metrics for classification.

mAP

mAP (mean average precision) is the average of AP. In some context, we compute the AP for each class and average them. But in some context, they mean the same thing. For example, under the COCO context, there is no difference between AP and mAP.

Evaluate AR + AP

Evaluate AR for object proposals, AP for instance detection/segmentation, AP for keypoint detection outputs using COCO's metrics

AP (Average precision) is a popular metric in measuring the accuracy of object detectors like Faster R-CNN, SSD, etc. Average precision computes the average precision value for recall value over 0 to 1.

alt text

In [ ]:
#//AP=46.032 BASE_LR = 0.01 TS=0.5 SOLVER.MOMENTUM=0.9 IMS_PER_BATCH = 8 FOCAL_LOSS_GAMMA = 0.9 FOCAL_LOSS_ALPHA = 0.6 SMOOTH_L1_LOSS_BETA = 0.2//


from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
evaluator = COCOEvaluator("wheat_val", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "wheat_val")
inference_on_dataset(predictor.model, val_loader, evaluator)
[08/06 16:11:01 d2.evaluation.coco_evaluation]: 'wheat_val' is not registered by `register_coco_instances`. Therefore trying to convert it to COCO format ...
[08/06 16:11:01 d2.data.datasets.coco]: Converting annotations of dataset 'wheat_val' to COCO format ...)
[08/06 16:11:01 d2.data.datasets.coco]: Converting dataset dicts into COCO format
[08/06 16:11:02 d2.data.datasets.coco]: Conversion finished, #images: 338, #annotations: 15116
[08/06 16:11:02 d2.data.datasets.coco]: Caching COCO format annotations at './output/wheat_val_coco_format.json' ...
[08/06 16:11:03 d2.data.build]: Distribution of instances among all 1 categories:
|  category  | #instances   |
|:----------:|:-------------|
|   wheat    | 15116        |
|            |              |
[08/06 16:11:03 d2.data.common]: Serializing 338 elements to byte tensors and concatenating them all ...
[08/06 16:11:03 d2.data.common]: Serialized dataset takes 0.79 MiB
[08/06 16:11:03 d2.data.dataset_mapper]: Augmentations used in training: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[08/06 16:11:03 d2.evaluation.evaluator]: Start inference on 338 images
[08/06 16:11:05 d2.evaluation.evaluator]: Inference done 11/338. 0.1122 s / img. ETA=0:00:45
[08/06 16:11:10 d2.evaluation.evaluator]: Inference done 50/338. 0.1135 s / img. ETA=0:00:37
[08/06 16:11:15 d2.evaluation.evaluator]: Inference done 89/338. 0.1135 s / img. ETA=0:00:32
[08/06 16:11:20 d2.evaluation.evaluator]: Inference done 126/338. 0.1139 s / img. ETA=0:00:28
[08/06 16:11:25 d2.evaluation.evaluator]: Inference done 165/338. 0.1135 s / img. ETA=0:00:22
[08/06 16:11:31 d2.evaluation.evaluator]: Inference done 204/338. 0.1134 s / img. ETA=0:00:17
[08/06 16:11:36 d2.evaluation.evaluator]: Inference done 242/338. 0.1131 s / img. ETA=0:00:12
[08/06 16:11:41 d2.evaluation.evaluator]: Inference done 274/338. 0.1128 s / img. ETA=0:00:08
[08/06 16:11:46 d2.evaluation.evaluator]: Inference done 315/338. 0.1124 s / img. ETA=0:00:03
[08/06 16:11:49 d2.evaluation.evaluator]: Total inference time: 0:00:44.693250 (0.134214 s / img per device, on 1 devices)
[08/06 16:11:49 d2.evaluation.evaluator]: Total inference pure compute time: 0:00:37 (0.112357 s / img per device, on 1 devices)
[08/06 16:11:49 d2.evaluation.coco_evaluation]: Preparing results for COCO format ...
[08/06 16:11:49 d2.evaluation.coco_evaluation]: Saving results to ./output/coco_instances_results.json
[08/06 16:11:49 d2.evaluation.coco_evaluation]: Evaluating predictions ...
Loading and preparing results...
DONE (t=0.02s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
COCOeval_opt.evaluate() finished in 0.29 seconds.
Accumulating evaluation results...
COCOeval_opt.accumulate() finished in 0.03 seconds.
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.474
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.840
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.481
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.057
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.466
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.518
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.016
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.148
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.527
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.057
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.522
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.570
[08/06 16:11:49 d2.evaluation.coco_evaluation]: Evaluation results for bbox: 
|   AP   |  AP50  |  AP75  |  APs  |  APm   |  APl   |
|:------:|:------:|:------:|:-----:|:------:|:------:|
| 47.354 | 83.986 | 48.131 | 5.699 | 46.615 | 51.845 |
Out[ ]:
OrderedDict([('bbox',
              {'AP': 47.35426124462239,
               'AP50': 83.98597983149305,
               'AP75': 48.13063738128962,
               'APl': 51.84472146634682,
               'APm': 46.614600928733616,
               'APs': 5.6988418622082})])

Submission Section

In [ ]:
from pathlib import Path
# data_dir = Path('/content/gdrive/My Drive/Global Wheat Detection')
# train_img_dir = Path(data_dir / 'train')
test_img_dir = Path('/content/gdrive/My Drive/Global Wheat Detection/test')

sub_path = Path(data_dir / 'sample_submission.csv')
sub_df = pd.read_csv(sub_path)
In [ ]:
from tqdm import tqdm
def submit():
    for idx, row in tqdm(sub_df.iterrows(), total=len(sub_df)):
        img_path = os.path.join(test_img_dir, row.image_id + '.jpg')
        
        img = cv2.imread(img_path)
        outputs = predictor(img)['instances']
        boxes = [i.cpu().detach().numpy() for i in outputs.pred_boxes]
        scores = outputs.scores.cpu().detach().numpy()
        list_str = []
        for box, score in zip(boxes, scores):
            box[3] -= box[1]
            box[2] -= box[0]
            box = list(map(int,box))
            score = round(score, 4)
            list_str.append(score)
            list_str.extend(box)
        sub_df.loc[idx, 'PredictionString'] = ' '.join(map(str, list_str))
        
    return sub_df
In [ ]:
sub_df = submit()
sub_df.to_csv('submission.csv', index=False)
In [ ]:
sub_df